SI DANS LE DOCKER -> les cells d'initialisation ont été exécutées, si pas éxécuté appuyer sur le bouton calculatrice
#!pip install moment git+http://github.com/lucasiscovici/studyProject.git dill
from IPython.display import clear_output, IFrame
from IPython import display
import logging
import collections
import os
import sys
import math
import glob
import base64
sys.path.append('./lib/')
from cache import cache
from dfply import make_symbolic
from datetime import datetime, date
import moment as moment_
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from studyProject.utils import *
from studyProject.helpers import *
from studyProject import Datas, StudyProject, StudyClassif
from studyProject.study.studyClassif import DatasClassif
from studyProject.utils.speedMLNewMethods import Speedml3
from studyPipe.studyPipe import convert_pipe, Pipe, Pipe__
# Graph Library
import plotly
import plotly_study.graph_objs as go2
import plotly.graph_objs as go
import plotly.express as px
import plotly.offline as poff
from plotly.subplots import make_subplots
from lxml import html
import requests
from tqdm._tqdm_notebook import tqdm_notebook
#import dash
#from dash.dependencies import Input, Output
#import dash_html_components as html
#import dash_core_components as dcc
#import werkzeug,logging
#logging.getLogger('werkzeug').setLevel(logging.ERROR)
tqdm_notebook.pandas()
config_completer()
poff.init_notebook_mode()
configNB=dict(
momentTZ=dict(timezone="Europe/Paris")
)
if False:
import rpy2
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import StrVector
_=utils.chooseCRANmirror(ind=25)
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import rpy2.ipython.html
rpy2.ipython.html.init_printing()
%load_ext rpy2.ipython
#Execute this codeCell to Execute all Functions
display_html("""<script>
$('[data-name="Aucun(e)"]').on("click",Jupyter.CellToolbar.global_hide)
</script>""")
# switch operator
# switch(choice: Dictionary, defaultValue: Any)
#
# EX: switch( {"foo": 10, "bar": 100 }, 0)
def switch(switcher,default=""):
return lambda argument: switcher.get(argument, default)
class _hideLog:
def __enter__(self):
self.curr=logging.getLogger().level
logging.getLogger().setLevel(logging.CRITICAL)
return self
def __exit__(self,*args,**xargs):
logging.getLogger().setLevel(self.curr)
hideLog=_hideLog()
def colorFader(c1,c2,mix=0): #fade (linear interpolate) from color c1 (at mix=0) to c2 (mix=1)
c1=np.array(mpl.colors.to_rgb(c1))
c2=np.array(mpl.colors.to_rgb(c2))
return mpl.colors.to_hex((1-mix)*c1 + mix*c2)
def gradientFromTwoCols(c1,c2,nb=3):
return [colorFader(c1,c2,i/nb) for i in range(nb)]
def getLinesFromFileName(fname):
lines=None
with open(fname,"r") as f:
lines=f.readlines()
return lines
# f string with variable with f-string
def fstr(template, **kwargs):
return eval(f"f'{template}'", kwargs)
def iframe_from_html(html, width="100%",height="500px"):
url = "data:text/html;base64," + base64.b64encode(html.encode('utf-8')).decode('utf-8')
return IFrame(url, width, height)
# fonction like moment js
# use the python library moment
#
# moment() -> Actual date
# moment(2020,3,22) -> get the 22/03/20 Date
# moment(1585602607) -> get the date with timestamp
# moment("today") or moment("22/03/20") -> get the date from string
# moment("03/22/20","%m/%d/%y") -> get the date from string and format
# moment(datetime.date(2019, 4, 13)) -> get the date from datetime object
def _moment(*args_,utc=False):
from pandas._libs.tslibs.timestamps import Timestamp
args=list(args_)
if len(args)==0:
return moment_.now() if not utc else moment_.utcnow()
if len(args)==3 and type(args[0]) is int:
return moment_.date(*args) if not utc else moment_.utc(*args)
if len(args)==1 and type(args[0]) in [int,float]:
return moment_.unix(args[0],utc=utc)
if len(args)==1 and type(args[0]) is str:
return moment_.date(args[0]) if not utc else moment_.utc(args[0])
if len(args)==2 and type(args[0]) is str and type(args[1]) is str:
return moment_.date(args[0],args[1]) if not utc else moment_.utc(args[0],args[1])
if len(args)==1 and type(args[0]) is Timestamp:
args[0]=args[0].to_pydatetime()
if len(args)==1 and type(args[0]) is datetime:
return moment_.date(args[0]) if not utc else moment_.utc(args[0])
return moment_(*args,utc=utc)
def moment(*args,utc=False, config=configNB.get("moment",{})):
m=_moment(*args,utc=utc)
if "timezone" in config:
m=m.timezone(config["timezone"])
return m
# add the method startOf from moment js
#
# .startOf("year") -> get the date startOf year
def startOf (this_,units):
#units = normalizeUnits(units);
# the following switch intentionally omits break keywords
# to utilize falling through the cases.
this=this_.copy()
switch ({
'year':lambda: this.replace(months=1,days=1,hours=0,minutes=0,seconds=0,microseconds=0), #date(1).hours(0).minutes(0).seconds(0).milliseconds(0);
'quarter':lambda:this.replace(days=1,hours=0,minutes=0,seconds=0,microseconds=0),#hours(0).minutes(0).seconds(0).milliseconds(0);
'month':lambda:this.replace(days=1,minutes=0,seconds=0,microseconds=0),
'week':lambda:this.replace(hours=0,minutes=0,seconds=0,microseconds=0),
'isoWeek':lambda:this.replace(hours=0,minutes=0,seconds=0,microseconds=0),
'day':lambda:this.replace(hours=0,minutes=0,seconds=0,microseconds=0),
'date':lambda:this.replace(hours=0,minutes=0,seconds=0,microseconds=0),
'hour':lambda:this.replace(minutes=0,seconds=0,microseconds=0),
'minute':lambda:this.replace(seconds=0,microseconds=0),
'second':lambda:this.replace(microseconds=0)
})(units)()
# weeks are a special case
if (units == 'week'):
this.replace(weekday=0)
if (units == 'isoWeek'):
raise NotImplementedError
# quarters are also special
if (units == 'quarter'):
this.replace(months=math.floor(this.month/3)*3)
return this;
moment_.Moment.startOf=startOf
# add the method endOf from moment js
#
# .endOf("year") -> get the date endOf year
def endOf (thisd,units=None):
this=thisd.copy()
if (units is None or units == 'millisecond'):
return this
# 'date' is an alias for 'day', so it should be considered as such.
if (units == 'date') :
units = 'day'
return this.startOf(units).add(**{('week' if units == 'isoWeek' else units):1}).subtract(1, 'ms');
moment_.Moment.endOf=endOf
# permet d'utiliser une fonction directement dans le pipe
# addToPipe(lambda a:a**2)
# addToPipe(np.max)
def addToPipe(a, convert=False):
import functools
pipedFn=Pipe(lambda x: a,special=True) if not convert else convert_pipe(_c.curry(a))
return functools.update_wrapper(pipedFn,a)
def addToPipe2(a):
import functools
pipedFn=Pipe(lambda x: c_.partial(a,x),special=True)
return functools.update_wrapper(pipedFn,a)
# forEach method comme en js, pour appliqué a chaque valeur d'une list un traitement
def forEach(a,b):
list(map(a,b))
_ftools_.__class__.forEach=addToPipe(forEach,convert=T)
#creation d'un nouveau placeholder pour créer des function facilement
pdict=dict(Pipe__.__dict__)
del pdict["_____func___"]
del pdict["special"]
class PipeY(Pipe__):pass
___=PipeY(lambda x:x)
#call obj if piped obj
callIfPipe=lambda pipe,obj,returnObj=False: obj._____func___(pipe) if issubclass(type(obj),Pipe) else (pipe if not returnObj else obj)
callIfPipeY=lambda pipe,obj,returnObj=False: obj._____func___(pipe) if issubclass(type(obj),PipeY) else (pipe if not returnObj else obj)
callIfFn=lambda info,obj,returnObj=False: obj(info) if type(obj).__name__ =="function" else (info if not returnObj else obj)
#dict avec des piped obj dans les clé et valeurs
dict_=addToPipe(lambda piped,dico={},*args,**xargs: {callIfPipe(piped,k,T):callIfPipe(piped,v,T) for k,v in (dico if len(xargs)==0 else xargs).items()})
iter_=addToPipe(lambda df,fn: fn._____func___(*df) if issubclass(type(fn),Pipe) else fn(*df))
dict2_=addToPipe2(lambda piped,dico={},*args,**xargs: {callIfPipe(piped,k,T):callIfPipe(piped,v,T) for k,v in (dico if len(xargs)==0 else xargs).items()})
first=lambda a:a.iloc[0]
pd.options.display.max_rows = 999
# old groupbyDate
pd.DataFrame.groupbyDate = (lambda self,
freq,
key="date",
*args,
**xargs: self.groupby(pd.Grouper(*args,
key=key,
freq=freq,
**xargs)))
# pandas groupByDate
# groupByDate2( DataFrame, String, Int, String, String)
# dateCol = Column_Where_The_Date_is
# nbJ = Nombre_de_Jours
# closed = Début des groupes à droite ou à gauche ("right, "left")
# label = Label des groupes avec les dates de gauche ("left") ou les dates de la "droite"
def groupByDate2(df,
colDate,
nbJ=7,
closed="right",
label="right",
*args,
**xargs):
#print("group2",df,colDate,nbJ,closed,label,args,xargs)
dateCol=colDate
dfClosed=df[dateCol][::-1] if closed=="right" else df[dateCol] #on part du prinpie que c triée par date croissance, si right-> on trié par date décroissante
dateActuelle= dfClosed.iloc[0] #premiere date, soit la plus basse (closed=left), soit la plus grande (closed=right)
dansLeGroupe=True #indique quand on doit ouvrir/fermer un group (le groupe courant)
groups={} # dico indiquant pour chaque index du dataframe, son groupe
indiceDatesGroups=0 # indice qui s'incremente à chaque nouveau groupe créé
datesGroups={} # dico contenant pour chaque groupe la date a affiché (en fonction de closed et label)
dateDebutFinGroup=dateActuelle # date a l'ouverture du groupe
dateFinDebutGroup=dateActuelle # date a la fermeture du groupe
for indexCurr,dateCurr in dfClosed.items():
#print(dateCurr,dateDebutFinGroup)
dateDiff=((dateCurr - dateDebutFinGroup).days)*(-1 if closed=="right" else 1) # difference entre la date courante et la date du début de groupe (si closed "left" c'est positif sinon c'est négatif d'ou la multiplication par -1)
if dateDiff >= nbJ: # si le nb de jours est dépassé le groupe prescendant est plein et on créé un autre groupe
dansLeGroupe=False
if not dansLeGroupe: # si le groupe est plein
datesGroups[indiceDatesGroups]=(dateFinDebutGroup if label=="left" else dateDebutFinGroup) if closed=="right" else (dateDebutFinGroup if label=="left" else dateFinDebutGroup) # on trouve la date a afficher en fonctionde closed et label
indiceDatesGroups+=1 # on incremente l'indice de groupe
dateDebutFinGroup=dateCurr # on change la date de debut du groupe
dansLeGroupe=True
if dansLeGroupe: # si le groupe n'est pas plein on rajoute l'element courant a ce groupe
dateFinDebutGroup=dateCurr # on change la date de fin de groupe avec celle actuelle
groups[indexCurr]=indiceDatesGroups
datesGroups[indiceDatesGroups]=(dateFinDebutGroup if label=="left" else dateDebutFinGroup) if closed=="right" else (dateDebutFinGroup if label=="left" else dateFinDebutGroup)
datesGrouped={ i:datesGroups[j] for i,j in groups.items()} # pour chaque groupe on lui affili sa date
#print(pd.Series(datesGrouped,name=dateCol))
#print(dateCol)
return df.groupby(pd.Series(datesGrouped,name=dateCol)) # on crée le groupby a partir des groupes créés
pd.DataFrame.groupByDate=groupByDate2
# agg method qui accepte les parametres avec clé valeur
pd.core.groupby.generic.DataFrameGroupBy.aggKV=lambda self,**args: self.agg(args)
pd.core.groupby.generic.DataFrameGroupBy.get_groups=lambda self: [self.get_group(i) for i in self.groups.keys()]
#selectionne pour chaque valeur de la serie le maximum entre nb et cette valeur (par ex: pas de valeur < 0 qd nb=0)
pd.Series.mini=lambda self,nb=0:self.apply(lambda g:np.max([g,nb]))
# add method mutate, filter_by from dfply to pandas dataframe
pd.DataFrame.mutate=lambda self, **xargs : self >> df.mutate(**xargs)
pd.DataFrame.filter_by=lambda self,*args, **kwargs: self >> df.filter_by(*args, **kwargs)
pd.DataFrame.group_by=lambda self,*args, **kwargs: self >> df.group_by(*args, **kwargs)
pd.DataFrame.select=lambda self,*args,**xargs:self >> df.select(*args,**xargs)
findColNameInPandasDf=lambda df,n: df.columns[n if n>=0 else len(df.columns)+n]
#Rename cols in df easily, with indice or name
pd.DataFrame.renameCols=lambda self,dico={},**xargs:self.rename(columns={findColNameInPandasDf(self,k) if type(k) is int else k:v for k,v in {**dico,**xargs}.items()})
#cut easily
# set bins and labels OR dico
# dico k=labels, v=bins
def cutSeries(self,bins=None,labels=None,dico=None):
if dico is not None:
bins=list(dico.values())
labels=list(dico.keys())
return pd.cut(self,bins=bins,labels=labels)
pd.Series.cut=cutSeries
# easily concat dataframes
pd.DataFrame.concatWithRows=lambda self,df2,ignore_index=True:pd.concat([self,df2],axis=0,ignore_index=ignore_index)
#@functools.wraps(groupByDate2)
def byDate(self,fun=lambda x:x,colGroupBy=None,*args,**xargs):
#print("ByDate",self,fun,args,xargs)
keys=list(self.groups.keys())
groupsDF=[self.get_group(i) for i in keys]
groupsDFByDate=[callIfFn(callIfPipeY(callIfPipe(i.groupByDate(*args,**xargs),fun),fun),fun) for k,i in zip(keys,groupsDF)]
#print(type(keys[0]))
#print(groupsDFByDate[0])
if type(keys[0]) is str:
groupsDFByDate=[i.mutate(**{colGroupBy:k}) for k,i in zip(keys,groupsDFByDate)]
else:
if colGroupBy is None:
raise Exception("when keys are not string ( multi-index) colGroupBy must be set")
def addIndexToDf(df,indexs,colGroupBy=colGroupBy):
df2=df
for indexName,indexValue in zip(colGroupBy,indexs):
df2=df2.mutate(**{indexName:indexValue})
return df2
groupsDFByDate=[addIndexToDf(i,k) for k,i in zip(keys,groupsDFByDate)]
return pd.concat(groupsDFByDate,axis=0)
pd.core.groupby.generic.DataFrameGroupBy.byDate=byDate
pd.DataFrame.groupbyAndDate=(lambda self,colGroupBy,fun,colDate="Date",*args,**xargs:self.groupby(colGroupBy)
.byDate(fun,colDate=colDate,colGroupBy=colGroupBy,*args,**xargs)
.reset_index().select(colGroupBy,df.everything()))
# add mutate + studyPipe
pd.DataFrame.mutate_=lambda self,dico:self.mutate(**(callIfPipe(self,dico)))
#first of df
pd.DataFrame.begin=lambda self: self.iloc[[0],:] if self.shape[0]>0 else self
#last of df
pd.DataFrame.end=lambda self: self.iloc[[-1],:] if self.shape[0]>0 else self
pd.Series.to_datetime=lambda self,*args,**xargs:pd.to_datetime(self,*args,**xargs)
def cumulToOne(df,colCumul, colSuffix="ByDay",indexCol="Country_Region",fill="first"):
fillValue=fill
if fillValue is None:
fillValue=0
if fillValue=="first":
fillValue=X[colCumul].iloc[0]
#print(df)
return df.group_by(indexCol).mutate(**{colCumul+colSuffix:(X[colCumul]-X[colCumul].shift()).fillna(fillValue ).mini().astype(int)})
cumulToOne_=addToPipe(cumulToOne)
def dateToStrDate(df,colDate="Date",colSuffix="",formatDate="%d/%m/%y"):
return df.mutate(**{colDate+colSuffix:X[colDate].dt.strftime(formatDate)})
dateToStrDate_=addToPipe(dateToStrDate)
def addNbCount(df):
return df.mutate(nb=1)
addNbCount_=addToPipe(addNbCount)
#ajoute une facilité pour subsplots
def titleToAnnotation(a, b, x, y, nb):
texto=b.layout.title.text
if texto:
lop="yaxis"+("" if nb==1 else str(nb))
lop2="xaxis"+("" if nb==1 else str(nb))
ooi=a.layout[lop]["domain"][1]+0.1
ooi2=np.mean([a.layout[lop2]["domain"][1],a.layout[lop2]["domain"][0]])
return a.update_layout(annotations=list(a.layout.annotations) + [
dict(yref="paper",
showarrow=F,
x=ooi2,
y=ooi,
font=dict(size=16),
xref="paper",
text=texto,
yanchor="top",
xanchor="center",
textangle=0)
])
else:
return a
def add_to_subplots(a,b,layoutToSave=['yaxis{nbPlot}.range'],customFn=[titleToAnnotation]):
nbCols=len(a._grid_ref[0])
nbRows=len(a._grid_ref)
nbC=len(filterl(None,np.ravel(a._grid_ref)))
nbD=len(a.data)
rowsCurr=nbD//nbCols+1
colsCurr=nbD%nbCols+1
#print(nbCols,nbRows,nbC,nbD,rowsCurr,colsCurr)
nbPlot_=nbD+1
nbPlot= "" if nbPlot_ == 1 else f"{nbPlot_}"
for i in customFn:
a=i(a,b,rowsCurr,colsCurr,nbPlot_)
a.add_trace(b.data[0],row=rowsCurr,col=colsCurr)
if len(layoutToSave)>0:
for i in layoutToSave:
ii=fstr(i,nbPlot=nbPlot)
ii2=fstr(i,nbPlot="")
#print(ii,ii2,i,nbPlot)
bb=b.layout
c=dict()
ok=True
tt=True
iis=ii.split(".")
iis2=ii2.split(".")
iisS=iis[::-1]
#print(iis,iis2)
but=None
for j_,(j,j2) in enumerate(zip(iis,iis2)):
#print(j_,j,j2,b,j2 in b)
if j2 in bb:
bb=bb[j2]
#print(bb,j,j2,j_)
#c[j]=dict() if j_+1<len(iis) else bb
#print(c)
if len(iis)==j_+1:
but=bb.to_plotly_json() if issubclass(type(bb),plotly.basedatatypes.BaseLayoutHierarchyType) else bb
else:
ok=False
break
#print(c,but,ok)
if ok:
for j in iisS:
o={j:but}
but=o
#print(o)
a=a.update_layout(**o)
return a
# permet d'utiliser l'operateur + entre deux Figure plotly et plotly_study
isSubplot=lambda fig:hasattr(fig,"_grid_ref") and (np.shape(fig._grid_ref)[0]*np.shape(fig._grid_ref)[1])>1
go.Figure.__add__ = lambda self,other: (self.add_trace(other.data[0]) if not isSubplot(self) else add_to_subplots(self,other)) if other.__class__ is go.Figure else self
go2.Figure.__add__ = lambda self,other: (self.add_trace(other.data[0]) if not isSubplot(self) else add_to_subplots(self,other)) if other.__class__ is go2.Figure else self
# ajoute un second axe à une figure existante, lié à yaxis2
def addSecondAxis(plotyFig):
yaxis_layout_plotlyFig=plotyFig.layout["yaxis"].to_plotly_json().copy()
yaxis_layout_plotlyFig.update(dict(anchor="x",
overlaying="y",
side="right",
gridcolor= '#E1E5ED',
showgrid= True,
tickfont= {'color': '#4D5663'},
title= {'font': {'color': '#4D5663'}, 'text': ''},
zerolinecolor= '#E1E5ED'
))
plotyFig.layout["yaxis2"]= yaxis_layout_plotlyFig
plotyFig.data[0].yaxis="y2"
return plotyFig
addSecondAxis_ = addToPipe(addSecondAxis)
# ajoute un rangeSlider à une figure plotly
def addSlider(plotyFig):
return plotyFig.update_layout(xaxis_rangeslider_visible=True)
addSlider_ = addToPipe(addSlider)
# fonction pour utiliser les methods de plotly express directement dans le pipe |
pex.line_= addToPipe(pex.line)
px.line_= addToPipe(px.line)
pex.bar_= addToPipe(pex.bar)
px.bar_=addToPipe(px.bar)
pex.scatter_=addToPipe(pex.scatter)
px.scatter_=addToPipe(px.scatter)
pex.choropleth_=addToPipe(pex.choropleth)
px.choropleth_=addToPipe(px.choropleth)
showLegend=lambda a:a.update_traces(showlegend=True)
showLegend_=addToPipe(showLegend)
update_layout_=addToPipe(lambda a,*args,**xargs:a.update_layout(*args,**xargs))
# fonction pour ajouter facilement un hoverTemplate
# args= list de variable à ajouter au template
# dico= dico de clé valeur, qui represente une valeur et son label a afficher
def hoverTemplate(*args,dicoFirst=False,**dico):
keysDicoValues=list(dico.keys())
values=range(len(args)) | _ftools_.mapl("customdata[{}]")
if dicoFirst:
keysDicoValues.extend(values)
values=keysDicoValues
else:
values.extend(keysDicoValues)
dicoLabels=list(dico.values())
labels=list(args)
if dicoFirst:
dicoLabels.extend(labels)
labels=dicoLabels
else:
labels.extend(dicoLabels)
hovertemplate=[]
for indice in range(len(values)):
hovertemplate.append(f"<i>{labels[indice]}</i> : %{{{values[indice]}}}")
return "<br>".join(hovertemplate)
hoverTemplate_ = addToPipe(hoverTemplate)
# convert mpl to plotly
def mpl_to_plotly2(fig=None):
from plotly.tools import mpl_to_plotly as mpl_to_plotly_
fig= plt.gcf() if fig is None else fig
return mpl_to_plotly_(fig)
# fonction pour créer deux onglets html depuis deux figure
def tabs(fig1,fig2, fig1Name="Absolue", fig2Name="Log",fig1Plus="",fig2Plus="",
maxHeight="500px",marginTopRate=100,marginTopMin=-450,marginTop="-450px"):
randomNumber=randomString()
nb1=0
if hasattr(fig1,"__tabs_nb__"):
nb1= fig1.__tabs_nb__
marginTop=marginTopMin-marginTopRate*nb1
marginTop=f"{marginTop}px"
if hasattr(fig1,"to_html"):
fig1=fig1.to_html(include_plotlyjs="require",auto_play=False)
elif hasattr(fig1,"data"):
fig1=fig1.data
if hasattr(fig2,"to_html"):
fig2=fig2.to_html(include_plotlyjs="require",auto_play=False)
elif hasattr(fig2,"data"):
fig2=fig2.data
gg= HTML("""
<button id="tab1{random}" class="active-me{random}" onclick="selectTab{random}(1,this);">{fig1Name}</button>
<button id="tab2{random}" onclick="selectTab{random}(2,this);">{fig2Name}</button>
<br/>
<div id="kk{random}">
<div id="tab1Content{random}">
{tab1}
{fig1Plus}
</div>
<div id="tab2Content{random}" class="hidden{random}">
{tab2}
{fig2Plus}
</div>
</div>
<style>
#tab1Content{random} {
position: static;
}
#tab2Content{random} {
width: 100%;
margin-top: {marginTop};
position: relative;
}
.hidden{random} {
margin-left: 100000px;
}
#kk{random} {
width:100%;
height:{maxH};
overflow:scroll;
}
.active-me{random} {
color: white;
background-color:gray;
}
</style>
<script>
function selectTab{random}(tabIndex,th) {
//Hide All Tabs
document.getElementById('tab1Content{random}').classList.remove("hidden{random}");
document.getElementById('tab1Content{random}').classList.add("hidden{random}");
document.getElementById('tab2Content{random}').classList.remove("hidden{random}");
document.getElementById('tab2Content{random}').classList.add("hidden{random}");
document.getElementById('tab1{random}').classList.remove("active-me{random}")
document.getElementById('tab2{random}').classList.remove("active-me{random}")
th.classList.add("active-me{random}")
//Show the Selected Tab
document.getElementById('tab' + tabIndex + 'Content{random}').classList.remove("hidden{random}");
}
</script>
""".replace("{tab1}",fig1 )
.replace("{tab2}",fig2 )
.replace("{random}",randomNumber)
.replace("{fig2Name}",fig2Name)
.replace("{fig1Name}",fig1Name)
.replace("{fig1Plus}",fig1Plus)
.replace("{fig2Plus}",fig2Plus)
.replace("{maxH}",maxHeight)
.replace("{marginTop}",marginTop)
)
gg.__tabs_nb__=min(nb1+1,1)
return gg
# afficher un dashboard dash dans un notebook
def show_app(app, # type: dash.Dash
port=10000,
width=700,
height=350,
offline=True,
style=True,
**dash_flask_kwargs):
"""
Run the application inside a Jupyter notebook and show an iframe with it
:param app:
:param port:
:param width:
:param height:
:param offline:
:return:
"""
url = 'http://0.0.0.0:%d' % port
iframe = '<iframe src="{url}" width={width} height={height}></iframe>'.format(url=url,
width=width,
height=height)
display.display_html(iframe, raw=True)
if offline:
app.css.config.serve_locally = True
app.scripts.config.serve_locally = True
if style:
external_css = ["https://fonts.googleapis.com/css?family=Raleway:400,300,600",
"https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css",
"http://getbootstrap.com/dist/css/bootstrap.min.css", ]
for css in external_css:
app.css.append_css({"external_url": css})
external_js = ["https://code.jquery.com/jquery-3.2.1.min.js",
"https://cdn.rawgit.com/plotly/dash-app-stylesheets/a3401de132a6d0b652ba11548736b1d1e80aa10d/dash-goldman-sachs-report-js.js",
"http://getbootstrap.com/dist/js/bootstrap.min.js"]
for js in external_js:
app.scripts.append_script({"external_url": js})
return app.run_server(debug=False, # needs to be false in Jupyter
port=port,
host="0.0.0.0",
**dash_flask_kwargs)
# fonction pour utiliser ses fonctions directement dans dfply
#log fonction
@make_symbolic
def log_(series):
return np.log(series)
#si 0 -> nan
@make_symbolic
def zeroNan(a):
return a.apply(lambda d: np.nan if d==0 else d)
#rien -> return seulement le param envoyé
@make_symbolic
def rien(s):
return s
if F:
# check if the package "pkg" exist in R
def rPackageExist(pkg):
try:
importr(pkg)
except:
return False
return True
# get the package "pkg" if exist if not install it in R
def rInstallIfNotExistPackage(pkg):
utils = importr('utils')
if not rPackageExist(pkg):
print(f"install R package {pkg}...")
utils.install_packages(pkg)
return importr(pkg)
#factoMineR=rInstallIfNotExistPackage("FactoMineR")
#factoextra=rInstallIfNotExistPackage("factoextra")
def getDataX(X,ok=True, variable=None, variableOk=True):
global Covidproj
if type(ok).__name__=="function":
ok=ok()
if X in Covidproj.data and ok:
print(f"{X} déjà dans le projet")
if variable is not None and variableOk:
globals()[variable] = Covidproj.data[X].dataTrain.X
else:
print(f"/!\ {X} pas exporté")
def getDataXX(X,ok,variable=None,fn=lambda:None,force=False,fun=None, export=True):
global Covidproj
if X not in Covidproj.data or not ok:
print(f"{X} not in project")
covidData = fn(force) if len(get_args(fn).names) >0 else fn() # on récupere les données
if variable is not None:
globals()[variable] = covidData
if fun is not None:
covidData=fun(covidData)
Covidproj.saveDatasWithId(X,covidData,pd.Series(name="__fake__"),
pd.DataFrame(),pd.Series(name="__fake2__")) # on ajoute les données dans le projet
if export:
Covidproj.export()
print("ok")
getDataX_=addToPipe(lambda x=3: (lambda a: [ getDataX(*i[:x]) for i in a] ))
getDataXX_=addToPipe(lambda sup=None:(lambda a: [ getDataXX(*(i if sup is None else i[:sup-1]+i[sup:])) for i in a] ))
Confirmed & Deaths World
# get world data about confirmed and deaths
def getData(force=False,
silent=False):
def _getDataByValue(url=None,value=None,force=False,silent=False):
today = moment(config=configNB.get("momentTZ",{})).date
fname="data/covid_19_data_{}_times_{}.csv".format(value, today.strftime("%Y_%m_%d"))
#glob.glob("data/covid_19_data_*")
if silent: return os.path.isfile(fname)
if not os.path.isfile(fname) or force:
print(f"load data {value}...")
os.system(f"curl {url} > data/_covid_19_data_times2.csv")
dataImported=pd.read_csv("data/_covid_19_data_times2.csv")
dataColumns=(dataImported >> df.select(~df.columns_to("Long",inclusive=True))).columns
dataWide=dataImported >> df.gather('Date', value, dataColumns)
dataWide.columns=dataWide.columns.map(lambda a:a.replace("/","_"))
dataWide.to_csv(fname,index=False)
os.system(f"rm data/_covid_19_data_times2.csv")
#pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv")
else:
dataWide=pd.read_csv(fname)
return dataWide
if silent:
return _getDataByValue(value="Deaths",silent=True)
(deaths, confirmed)= [
_getDataByValue("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv",
"Deaths",
force),
_getDataByValue("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
"Confirmed",
force)
]
allDataCovid=deaths >> df.inner_join(confirmed,
by=["Province_State",
"Country_Region",
"Lat",
"Long",
"Date"])
if not (deaths.shape[0] == confirmed.shape[0] and confirmed.shape[0] == allDataCovid.shape[0]):
print("ERROR: /!\ pb de join",deaths,confirmed,allDataCovid,deaths.shape[0] == confirmed.shape[0],confirmed.shape[0] == allDataCovid.shape[0])
#return None
return allDataCovid
# NOT NEEDED NOW
# get French Data about confirmed and deaths
def getDataFr(force=False,
silent=False):
return True
today = moment(config=configNB.get("momentTZ",{})).date
fname="data/chiffres-cles_{}.csv".format(today.strftime("%Y_%m_%d"))
if silent:
return os.path.isfile(fname)
if not os.path.isfile(fname) or force:
print("load data fr...")
dirTmp=TMP_DIR()
os.system("git clone https://github.com/opencovid19-fr/data.git "+dirTmp.get())
os.system("cp -r {dirTmp.i} /sante-publique-france covidD/")
dirTmp.delete()
os.system("cd covidD && node build")
frData = pd.read_csv("covidD/dist/chiffres-cles.csv")
frData=frData >> df.filter_by(X.granularite=="pays") >> df.select("date",df.columns_between(X.cas_confirmes,X.gueris))
frData.to_csv(fname,index=False)
else:
frData=pd.read_csv(fname)
return frData
Tests in World with Date
#get data for covid 19 worldwide
def getDataTests(fname="./data/testsWorld.csv",force=False,silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
testsPer100000=(
pd.read_csv("data/covid19-tests-per-million-people.csv").dropna()
.mutate_(dict_({__.columns[-1]:__.iloc[:,-1]/10.}))
.renameCols({-1:"Total Covid19 Test per 100,000"})
)
testsWorld=pd.read_csv("data/testsWorld.csv").mutate(testsPer100000=X.testsPer10000*10)
globalTestsWold100000=(
testsPer100000
.mutate(Date=X.Date.apply(pd.to_datetime))
.renameCols({-1:"testsPer100000"},Entity="Country")
.concatWithRows(testsWorld
.mutate(Date=X.Date.apply(pd.to_datetime)))
.dropna(axis=1)
.sort_values(["Country","Date"])
.groupby(["Country"])
.last()
.reset_index()
)
globalTestsWold100000.to_csv(fname,index=False)
else:
globalTestsWold100000=pd.read_csv(fname)
return globalTestsWold100000
hospitalisation en France
def getDataHospitFrance(force=False,
silent=False):
today = moment(config=configNB.get("momentTZ",{})).date
fname="data/covid19DataHospitFr_{}.csv".format(today.strftime("%Y_%m_%d"))
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
print(f"load data {today.strftime('%Y_%m_%d')}...")
page = requests.get('https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19')
tree = html.fromstring(page.content)
url=tree.cssselect(".dataset-container .resources-list .resource-card")[0].cssselect("[download]")[0].get("href")
dataFrHospit=pd.read_csv(url,sep=";")
dataFrHospit.to_csv(fname,index=False)
else:
dataFrHospit=pd.read_csv(fname)
return dataFrHospit
#TODO: AJOUTER DATA QUOTIDIENNE
# get population by country with population by age
def getDataPopu(fname="./data/popu.csv", force=False, silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
popu=(
pd.read_csv("./data/WPP2019_POP_F15_1_ANNUAL_POPULATION_BY_AGE_BOTH_SEXES/ESTIMATES-Tableau 1.csv",
";",
skiprows=range(16))
.reset_index(drop=T)
) >> df.drop(range(2))
# select right columns
popuMonde=(popu
>> df.filter_by(X.Type=="Country/Area",
X.iloc[:,5]==2020)
>> df.select(0,df.columns_from(6))
>> df.rename(pays=0)) | __.reset_index(drop=True)
# group in tranche A (0-14), B (15-44), C (45-64), D (65-74), E (75-+)
popuMonde["Code tranches d'age_A"]=popuMonde.loc[:,['0-4', '5-9', '10-14']].transform(lambda a:a.str.replace(" ","").astype(int)).sum(axis=1)
popuMonde["Code tranches d'age_B"]=popuMonde.loc[:,['15-19','20-24','25-29','30-34','35-39','40-44']].transform(lambda a:a.str.replace(" ","").astype(int)).sum(axis=1)
popuMonde["Code tranches d'age_C"]=popuMonde.loc[:,['45-49','50-54','55-59','60-64']].transform(lambda a:a.str.replace(" ","").astype(int)).sum(axis=1)
popuMonde["Code tranches d'age_D"]=popuMonde.loc[:,['65-69','70-74']].transform(lambda a:a.str.replace(" ","").astype(int)).sum(axis=1)
popuMonde["Code tranches d'age_E"]=popuMonde.loc[:,['75-79','80-84','85-89','90-94','95-99','100+']].transform(lambda a:a.str.replace(" ","").astype(int)).sum(axis=1)
popuMonde=popuMonde.drop(['0-4', '5-9', '10-14'],axis=1)
popuMonde=popuMonde.drop(['15-19','20-24','25-29','30-34','35-39','40-44'],axis=1)
popuMonde=popuMonde.drop(['45-49','50-54','55-59','60-64'],axis=1)
popuMonde=popuMonde.drop(['65-69','70-74'],axis=1)
popuMonde=popuMonde.drop(['75-79','80-84','85-89','90-94','95-99','100+'],axis=1)
# long -> wide
popuMondeTranches=(
(popuMonde >> df.mutate( popu = X.iloc[:,1:].sum(axis=1) ) >> df.gather("tranches_age","value", df.starts_with("Code"))) >>
df.mutate(tranches_age=X.tranches_age.str.replace("Code tranches d'age_",""))
)
popuMondeTranches.to_csv(fname,index=False)
else:
popuMondeTranches=pd.read_csv(fname)
return popuMondeTranches
#get Median Age by Country
def getDataMedianAge(fname="./data/medianAgeWorld.csv",force=False,silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
medianAge=(pd.read_csv("data/median-age.csv").renameCols({-1:"medianAge","Entity":"Country"})
.set_index(["Country","Year"]).loc(axis=0)[:,2020]
.iloc[:,[-1]]
.reset_index())
medianAge.to_csv(fname,index=False)
else:
medianAge=pd.read_csv(fname)
return medianAge
#get taux d'urbanisation par pays (% du nb de personnes dans les / le reste )
def getDataUrba(fname="./data/tauxUrbaWorld.csv", force=False, silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
tauxUrba=pd.read_csv("data/tauxUrba.csv",
sep=";",
skiprows=range(4)).iloc[:,[0,-3]]
tauxUrba.to_csv(fname,index=False)
else:
tauxUrba=pd.read_csv(fname)
return tauxUrba
# get la densité de population (par km^2)
def getDataPopuDensity(fname="./data/popDensityWorld.csv", force=False, silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
popdensity=pd.read_csv("data/pop_density.csv",
sep=";",
skiprows=range(4)).iloc[:,[0,-3]]
popdensity.to_csv(fname,index=False)
else:
popdensity=pd.read_csv(fname)
return popdensity
def getDepartementsLatLon(fname="./data/depLatLon.csv", force=False, silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
files=glob.glob("./data/depfr-txt/*")
filesDep=files | _ftools_.mapl(lambda a:[a.split("/")[-1].split("-")[0],a])
filesDepDict=dict(filesDep)
depNums=list(filesDepDict.keys())
dfDep=pd.DataFrame(filesDepDict.items(),columns=["depNum","file"])
#frHospitTrainX.dep.apply(lambda a:filesDepDict[a] if a in filesDepDict else np.nan)
#frHospitTrainX=frHospitTrainX.dropna()
dfDep["fileLines"]=dfDep.file.progress_apply(lambda a:getLinesFromFileName(a))
dfDep["depName"]=dfDep.fileLines.progress_apply(lambda a:a[1].strip())
dfDep["latLon"]=dfDep.fileLines.progress_apply(lambda a:",".join(a[2].split(",")[:2]))
depLatLon=dfDep >> df.separate(X.iloc[:,-1],["lon","lat"],sep=",")
depLatLon=depLatLon.loc[:,["depNum","depName","lon","lat"]]
depLatLon.to_csv(fname,index=False)
else:
depLatLon=pd.read_csv(fname)
return depLatLon
# get life expectancy
def getDataLife(fname="./data/life_expectancy_world.csv",force=False,silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
lifeExp=pd.read_csv("./data/life_expectancy.csv",
sep=";")
lifeExpBothSexes=((lifeExp.set_index(lifeExp
.columns[:2]
.tolist()
) >>
df.select( ~df.contains(".") )) |__
.drop("Country") |__
.loc[(slice(None),"2016"),:] |__
.reset_index(1,drop=T) |__
.rename_axis("Country")
).reset_index().set_index("Country").astype("float").reset_index()
lifeExpBothSexes.to_csv(fname,index=False)
else:
lifeExpBothSexes=pd.read_csv(fname)
return lifeExpBothSexes
def getDataDoctors(fname="./data/doctorsWorld.csv", force=False, silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
medicalWorld=(pd.read_csv("./data/medicalWorld.csv",
index_col=[0,1])
.iloc[:,:1]
.drop(( 'Country', 'Year'))
.rename_axis(( 'Country', 'Year'))
.reset_index("Year")
.groupby("Country")
.first()
.reset_index()
.dropna() |__
.astype((__.columns,[np.object,np.int,np.float]) |_funs_|
zip |_fun_|
dict)
.rename_cols(__.columns[:2].tolist()+["MedicalDoctorsPer100000"])
.mutate(MedicalDoctorsPer100000=X.MedicalDoctorsPer100000*10)
)
medicalWorld.to_csv(fname,index=False)
else:
medicalWorld=pd.read_csv(fname)
return medicalWorld
# get hospital beds, acute beds, icu beds (/100000)
def getDataBeds(fname="./data/bedsWorld.csv",force=False,silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
hospitals=(pd.read_csv("data/Hospital_beds_100000.csv")
.groupby("Country").first().reset_index()
)
icuBeds=(pd.read_csv("data/icus.csv",sep=";",usecols=range(3))
.groupby("Country").first().reset_index().set_axis(["Country","acuteBeds/100000","icuBeds/100000"],axis=1)
)
icuBedsAsia=(pd.read_csv("data/bedsAsia.csv",sep=";",usecols=range(5))
.groupby("Country").first().reset_index()
)
icuBedAsia2=(
icuBedsAsia
.mutate(criticalCareBedsPer100000Verif=(X.criticalCareBeds/X.popu*100000).round(1))
.mutate(acuteHospitalBed=(X.criticalCareBeds/X.criticalCareBedsAsPercOfAcuteHospitalBeds*100).round(0))
.mutate(acuteHospitalBedPer100000=(X.acuteHospitalBed/X.popu*100000).round(1))
).iloc[:,[0,7,3]].set_axis(['Country', 'acuteBeds/100000', 'icuBeds/100000'],axis=1)
acuteAndIcuBeds=pd.concat([icuBeds,icuBedAsia2],ignore_index=T)
hostipalsBeds=hospitals.drop(["Year"],axis=1)
hostipalsBeds.Country=(hospitals.Country
.replace("Brunei Darussalam","Brunei")
.replace("Czechia","Czech Republic")
.replace("Democratic People's Republic of Korea","North Korea")
.replace("Democratic Republic of the Congo","Congo")
.replace("Iran (Islamic Republic of)","Iran")
.replace("Lao People's Democratic Republic","Laos")
.replace("Republic of Korea","South Korea")
.replace("United Kingdom of Great Britain and Northern Ireland","UK")
.replace("United States of America","USA")
)
acuteAndIcuBeds.Country = (acuteAndIcuBeds.Country
.replace("The Netherlands","Netherlands")
)
beds=hostipalsBeds.merge(acuteAndIcuBeds,how="outer").sort_values("Country")
beds.to_csv(fname,index=F)
else:
beds=pd.read_csv(fname)
return beds
def getDataPoliticalRegime(fname="./data/politicalRegimeWorld.csv",force=False,silent=False):
if silent:
return os.path.isfile(fname)
if force or not os.path.isfile(fname):
politicalRegime=pd.read_csv("./data/political-regime.csv")
politicalRegime=(politicalRegime.groupby("Entity").last().reset_index()
.renameCols({-1:"Political Regime"})
.filter_by(X["Political Regime"]!=-20)
.mutate(PoliticalRegimeCat=X["Political Regime"].cut([-11,-6,0,5,10],[
"Autocracy",
"Closed Anocracy",
"Open Anocracy",
"Democracy"])).iloc[:,[0,2,3,4]])
politicalRegime.to_csv(fname,index=False)
else:
politicalRegime=pd.read_csv(fname)
return politicalRegime
#usefull functions for EDA
def choroplethEda(df,
locationmode="country names",
locations="Country",
color_continuous_scale=px.colors.sequential.Bluered,
color=__.columns[-1],
tracesXargs={},
**xargs):
if locations not in df.columns:
locations="Country Name"
if locations not in df.columns:
locations="Entity"
if locations not in df.columns:
locations="Country_Region"
return df | px.choropleth_(df,
locationmode=locationmode,
locations=locations,
color_continuous_scale=color_continuous_scale,
color=color,**xargs).update_traces(**tracesXargs)
choroplethEda_=addToPipe(choroplethEda)
#easily create tabs with absolute and log values, or alone plot
def makeTabsEdaOrAlone(df,
logCol=None,
logName=None,
title="",
onlyLog=False,
onlyAbsolute=False,
hover_data=None,
logVarText="Log",
logText=" (log)",
logFun=np.log,
color=__.columns[-1],
tabsXargs={},
zeroNanOK=False,
logAdd1=True,
fnPlot=lambda a:a,
**xargs):
fnNan=rien if not zeroNanOK else zeroNan
color= color if type(color) is str else callIfFn(df,callIfPipe(df, color,returnObj=T),returnObj=T)
if logCol is None:
logCol=callIfPipe(df,color,returnObj=True)
if logName is None:
logName=logCol+logVarText
funcEda=lambda df=df,title=title,color=color: df.mutate(**{color:fnNan(X[color])}) | choroplethEda_(hover_data=hover_data,title=title,color=color,**xargs)
fig1=callIfPipeY(callIfPipe(funcEda(),fnPlot),fnPlot)
fig2=callIfPipeY(callIfPipe(funcEda(df.mutate(**{logName:logFun(df[logCol]+( 1 if logAdd1 else 0))}),
title+logText,
color=logName),fnPlot),fnPlot)
if onlyLog:
return fig2
if onlyAbsolute:
return fig1
return tabs(fig1,fig2,**tabsXargs)
makeTabsEdaOrAlone_=addToPipe(makeTabsEdaOrAlone)
def countryProvinceToCountryAndCols(df,cols=["Deaths","Confirmed"], countryCol="Country_Region",colDate="Date"):
return df.groupby([countryCol,colDate]).sum().loc[:,cols].reset_index()
countryProvinceToCountryAndCols_=addToPipe(countryProvinceToCountryAndCols)
def covidDataPreEda(df,
col="Deaths",
groupByCol="Country_Region",
colDate="Date",
cumulToOneSuffix="ByDate",
suffixCumulative="Cumulative",
suffixAbsolute="",
more={}):
data_=(df |
countryProvinceToCountryAndCols_(cols=[col]+list(more.keys())) |
cumulToOne_(colCumul=col,colSuffix=cumulToOneSuffix) |
addNbCount_() |__
.groupby([groupByCol,colDate]).aggKV(nb=sum,
**more,
**{col:sum,
col+cumulToOneSuffix:sum}
)
.reset_index()
.renameCols({col:col+suffixCumulative,
col+cumulToOneSuffix:col+suffixAbsolute})
)
return data_
covidDataPreEda_=addToPipe(covidDataPreEda)
def showCountPlus(n1="",n2="",what="cas confirmés",sign="+",color="rgb(255, 153, 71)",
fontSize="35px",
fontSize2="20px",
fontSize3="25px"):
return iframe_from_html("""
<meta charset='utf8'>
<style>
body{
font-family: "Marianne",Arial;
font-size:{fontSize};
}
.counter.jsx-1203414269 {
display: flex;
flex-direction: column;
justify-content: space-around;
text-align: center;
color: {color};
margin: 1em 0px;
}
.difference{
font-size:{fontSize2};
}
.texto{
font-size:{fontSize3};
}
</style>
<div class="jsx-1203414269 counter"><div class="jsx-1203414269 value">{n1}
{n2}
<div class="jsx-1203414269 texto">{what}</div></div>
""".replace("{n1}", ( f"{n1}" if type(n1) is str else f"{n1:,}") if n1 != "" else "")
.replace("{n2}",( f"<div class='jsx-1203414269 difference'>( {sign} {n2} )</div>" if type(n2) is str else f"<div class='jsx-1203414269 difference'>( {sign} {n2:,} )</div>") if n2 != "" else "")
.replace("{what}",what)
.replace("{color}",color)
.replace("{fontSize}",fontSize)
.replace("{fontSize2}",fontSize2)
.replace("{fontSize3}",fontSize3)
,height="100%")
#FUNCTIONS FOR JOIN COVID DATA (DEATHS, Confirmed) WITH THE POPULATION OF EACH COUNTRY
def searchPays( pays,ddz):
return ( ddz | _ftools_
.mapl(lambda a:pays in a) |_fun_
.np.argwhere(__) | _ftools_
.mapl(lambda a: a[0] if len(a)>0 else []) | _ftools_
.mapl(lambda a:[a,ddz[a]])) | addToPipe(lambda a:None if len(a)==0 else a[0])
def searchPaysNotJoined(feg, ddz):
return ( np.argwhere(
feg
.map(lambda a: np.where(ddz==a.lower())[0].shape[0]>0 ).values == False
).reshape(-1) |_ftools_
.mapl(lambda a:feg[a])
) | _ftools_.mapl(lambda a:[a,searchPays(a.lower(),ddz)])
def joinCovidDeathsAndPopu(covidEdaTrainX, popuMondeTranches):
fzf=(
covidEdaTrainX.groupby(["Country_Region","Date"]).sum().reset_index("Date")
).join(popuMondeTranches.set_index("pays"))
ddz=popuMondeTranches.pays.str.lower().values
feg=covidEdaTrainX.Country_Region.value_counts().index
dg=searchPaysNotJoined(feg,ddz)
#print(dg)
vr=popuMondeTranches.set_index("pays")
#return fzf,dg
for i,j in dg:
if j is not None:
#print(j[0])
fzf.loc[i,"popu"]=popuMondeTranches.iloc[j[0],:].loc["popu"]
try:
fzf.loc["US","popu"]=popuMondeTranches.set_index("pays").loc["United States of America","popu"].iloc[0]
except:
pass
dldl=fzf.reset_index().rename(columns={"index":"Country_Region"})
dldl.Country_Region=dldl.Country_Region.astype("category")
return dldl
# eda for world data, Deaths and confirmed
def covidDataEda(df,
#popuMondeTranches=None,
col=__.columns[-1],
cumulative=False,
groupByDate=None,
groupByCol="Country_Region",
suffixGrouped="Grouped",
suffixAbsolute="",
suffixCumulative="Cumulative",
log=False,
both=False,
bothCumulative=False,
boths=True,
hover_data=None,
colDate="Date",
animation_frame=True,
animation_frame_col="Date",
begin="start",
end="last",
title="Number of {col} worldwide",
groupbyAndDateXargs={},
zeroNan=True,
groupByText=" (par {groupByDate} jours)",
byPopu=True,
fnPlot=lambda a:a,
**xargs
):
def covidDataEda2(df=df,col=col,begin=begin, end=end,groupByCol=groupByCol,colDate=colDate,hover_data=hover_data,suffixAbsolute=suffixAbsolute,
both=both,boths=boths,log=log,bothCumulative=bothCumulative,title=title,animation_frame=animation_frame,
animation_frame_col=animation_frame_col,suffixCumulative=suffixCumulative,zeroNan=zeroNan, fnPlot=fnPlot,byPopu=byPopu,xargs=xargs):
if byPopu:
gj=df
XU=gj.mutate(**{col+"Base":X[col],
col+suffixCumulative+"Base":X[col+suffixCumulative]}).mutate(**{col: (X[col]/(X["popu"]*1000))*100,
col+suffixCumulative:(X[col+suffixCumulative]/(X["popu"]*1000))*100 })
XU=XU.filter_by(X.popu.notna())
XU[groupByCol]=XU[groupByCol].cat.remove_unused_categories()
title+=" (par population %)"
#toSelect+=[XUCol+"Base"]
hover_data=[col+"Base",col+suffixCumulative+"Base",colDate]
df=XU
if not animation_frame:
if end == "last":
df=df.groupby(groupByCol).last().reset_index()
title+=f" (~{ moment().date.strftime('%d %B %Y')})"
else:
end= moment(end).date
df=(df.groupby(groupByCol).get_groups() | _ftools_.
mapl(lambda a:a.filter_by(X[colDate]< end).end()) |_fun_.
pd.concat(__,axis=0)
)
title+=f" (~{end.strftime('%d %B %Y')})"
else:
xargs["animation_frame"]=animation_frame_col
if begin!="start":
if type(begin) is moment_.core.Moment:
begin=begin.date
if type(begin) is datetime:
df=(df.groupby(groupByCol).get_groups() | _ftools_.
mapl(lambda a:a.filter_by(X[colDate]>= begin)) |_fun_.
pd.concat(__,axis=0)
)
title+=f" (start ~{begin.strftime('%d %B %Y')})"
if hover_data is None:
hover_data = [colDate]
if colDate in hover_data:
df=df | dateToStrDate_(colDate=colDate)
#print(df)
if both or log:
hover_data+=[col+suffixAbsolute]
fig1=makeTabsEdaOrAlone(df,
color=col+suffixAbsolute,
title=title,
onlyLog=log,
onlyAbsolute=not both and not log,
hover_data=hover_data,
zeroNanOK=zeroNan,
fnPlot=fnPlot,
**xargs)
if bothCumulative:
hover_data+=[col+suffixCumulative]
title+=" (Cumulative)"
fig2=makeTabsEdaOrAlone(df,
color=col+suffixCumulative,
title=title,
onlyLog=log,
onlyAbsolute=not both and not log,
hover_data=hover_data,
zeroNanOK=zeroNan,
fnPlot=fnPlot,
**xargs)
fig1=tabs(fig1,fig2,"Absolue","Cumulative",marginTop="-550px" if both and bothCumulative else '-500px')
return fig1
col= col if type(col) is str else callIfFn(df,callIfPipe(df, col,returnObj=T),returnObj=T)
title=fstr(title,col=col)
groupCols={col:sum, col+suffixCumulative:np.max}
if byPopu:
groupCols["popu"]=np.max
if boths:
both=True,
bothCumulative=True
if both:
log=False
if bothCumulative:
cumulative=False
if hover_data is None:
hover_data = [colDate]
if groupByDate is not None:
df=df.groupbyAndDate(groupByCol,
___.aggKV(nb=sum,
**groupCols),
nbJ=groupByDate,**groupbyAndDateXargs)
#print(df)
hover_data+=["nb","DateDebut"]
df=df.mutate(DateDebut=X.Date.apply(lambda a:moment(a).add(days=-groupByDate).date.strftime('%d/%m/%Y')),
**{groupByCol:X[groupByCol].astype("category")})
title+=fstr(groupByText,groupByDate=groupByDate)
if not cumulative:
if byPopu:
fig1=covidDataEda2(df=df,title=title,bothCumulative=bothCumulative,both=both,col=col,log=log,hover_data=hover_data,byPopu=False)
fig2=covidDataEda2(df=df,title=title,bothCumulative=bothCumulative,both=both,col=col,log=log,hover_data=hover_data)
return tabs(fig1,fig2,"Absolue","ByPopu",marginTop="-550px")#raise NotImplementedError("by popu")
else:
return covidDataEda2(df=df,title=title,bothCumulative=bothCumulative,both=both,col=col,log=log,hover_data=hover_data)
else:
raise NotImplementedError("only cumulative")
covidDataEda_=addToPipe(covidDataEda)
def getLinesEdaCovidByCountry(XU,suffixeCumulative="Cumulative",countryRegionCol="Country_Region",
colDate="Date",maxi=15,title="Nombre de {XUCol} par pays",
# popu=None,
addLog=False,
begin="start",
byPopu=False,
add=[]):
XUCol=XU.columns[-1]+suffixeCumulative
toSelect=[XUCol]
hover_data=[]
if byPopu :
gj=XU#joinCovidDeathsAndPopu(XU, popu).groupby([countryRegionCol,colDate]).first().reset_index()
XU=gj.mutate(**{XUCol+"Base":X[XUCol]}).mutate(**{XUCol: (X[XUCol]/(X["popu"]*1000))*100})
XU=XU.filter_by(X.popu.notna())
XU=XU.mutate(**{XUCol:X[XUCol].replace(float("inf"),np.nan)}).dropna(axis=0)
XU.Country_Region=XU.Country_Region.cat.remove_unused_categories()
title+=" (par population %)"
toSelect+=[XUCol+"Base"]
hover_data=[XUCol+"Base"]
if begin!="start":
if type(begin) is moment_.core.Moment:
begin=begin.date
if type(begin) is datetime:
XU=(XU.groupby(countryRegionCol).get_groups() | _ftools_.
mapl(lambda a:a.filter_by(X[colDate]>= begin)) |_fun_.
pd.concat(__,axis=0)
)
title+=f" (start ~{begin.strftime('%d %B %Y')})"
lfel=XU.set_index([countryRegionCol,colDate]).select(toSelect)
flfl=np.argsort(XU.groupby([countryRegionCol]).last()[XUCol].values)[::-1]
fname=XU[countryRegionCol].cat.categories[flfl]
figD=(lfel.reindex(fname,axis=0,level=0).reset_index()
.filter_by(X[countryRegionCol].apply(lambda a:a in (fname[:maxi].tolist()+add))))
fig=(
figD
|
px.line_(__,x=colDate,y=XUCol,color=countryRegionCol, hover_data=hover_data)
.update_layout(title=fstr(title,XUCol=XUCol))
)
if addLog:
fig2=(
figD
.mutate(**{XUCol+"Log":log_(X[XUCol])})
|
px.line_(__,x=colDate,y=XUCol+"Log",color=countryRegionCol, hover_data=hover_data+[XUCol])
.update_layout(title=fstr(title+" (log)",XUCol=XUCol))
)
return tabs(fig,fig2)
return fig
getLinesEdaCovidByCountry_=addToPipe(getLinesEdaCovidByCountry)
def barPlotCovidEdaBy(df,
pays,
by=1,
col=__.columns[-1],
#popu=None,
#byPopu=False,
beginPlot=None,
beginDate=None):
col= col if type(col) is str else callIfFn(df,callIfPipe(df, col,returnObj=T),returnObj=T)
title=f"Nombre de {col}: {pays} et son taux de croissance par {by} jours"
dataF=(
df
.filter_by(X.Country_Region==pays)
.select("Date",col,col+"Cumulative")
.groupByDate("Date",by).aggKV(**{col+"Cumulative":np.max,
col:np.sum})
)
# print(byPopu,popu)
# if byPopu and popu is not None:
# popus=popu.filter_by(X.pays==pays).select("popu").iloc[0]["popu"]*1000.
# dataF=dataF.mutate(**{col+"Cumulative": X[col+"Cumulative"]/popus*100,
# col: X[col]/popus*100})
# title+=" (par population %)"
#return dataF
if beginPlot is not None:
if type(beginPlot) is moment_.core.Moment:
beginPlot=beginPlot.date
if type(beginPlot) is datetime:
dataF=dataF.reset_index().filter_by(X.Date >=beginPlot).set_index(["Date"])
fig1= (
(
dataF
.mutate(**{col+"Rate":X[col].pct_change().fillna(0).replace({float("inf"):0})})
.reset_index()
|
px.line_(__,x="Date",y=col+"Rate",color_discrete_sequence=["red"]).update_traces(name=f"Variation du Deces/{by}j",mode="markers+lines")
)
+
(
dataF
.reset_index()
|
px.bar_(__,x="Date",y=col,text=col).update_traces(name="Décès",offset=-by*24*60*60*1000,width=by*24*60*60*1000)
)
+
(
dataF
.reset_index()
|
px.scatter_(__,x="Date",y=col+"Cumulative", color_discrete_sequence=["pink"]).update_traces(name="Nombre de décès cummulé")
)
|
addSecondAxis_
|
addSlider_
|
showLegend_
|__
.update_layout(title=title)
)
title+=" (cummulé)"
fig2=(
(
dataF
.mutate(**{col+"Cumulative"+"Rate":X[col+"Cumulative"].pct_change().fillna(0).replace({float("inf"):0})})
.reset_index()
|
px.line_(__,x="Date",y=col+"Cumulative"+"Rate",color_discrete_sequence=["red"]).update_traces(name=f"Variation du Deces Cummulé/{by}j",
mode="markers+lines", yaxis="y2"
)
)
+
(
dataF
.reset_index()
|
px.bar_(__,x="Date",y=col+"Cumulative",text=col+"Cumulative").update_traces(offset=-by*24*60*60*1000,width=by*24*60*60*1000,name="Nombre de décès cummulé")
)
|
addSecondAxis_
|
addSlider_
|
showLegend_
|__
.update_layout(title=title)
)
return tabs(fig1,fig2,"Absolue","Cummulé")
barPlotCovidEdaBy_=addToPipe(barPlotCovidEdaBy)
def graphPopu(popuMondeTranches,
log=False,
both=False):
title="Population par pays"
data_=(popuMondeTranches
.groupby(["pays"]).first().reset_index()
.mutate( popu= X.popu*1000 ))
figA=lambda *args,**xargs:makeTabsEdaOrAlone_(color="popu",
title=title,
locations="pays",
*args,**xargs)
if both:
log=False
if log:
figA=data_ | figA(onlyLog=True)
if both:
figA=data_ | figA()
return figA
graphPopu_=addToPipe(graphPopu)
def graphPopuTranchesAge(popu,
byPopu=False,
both=False,
**xargs):
pop=popu >> df.spread("tranches_age","value") >> df.mutate(popu=X.popu*1000)
title="Population dans le Monde "
tranches=["A","B","C","D","E"]
labelsTranches=["Moins de 15 ans","15-44","45-64","65-74","Plus de 75 ans"]
maskTranches=[False]*len(tranches)
if both:
return tabs(graphPopuTranchesAge(popu,byPopu=False,both=False,**xargs),
graphPopuTranchesAge(popu,byPopu=True,both=False,**xargs),
fig2Name="ByPopu")
if byPopu:
pop=pop >> df.group_by("pays") >> df.mutate(**{i+"ByPopu":X[i]*1000/X.popu*100 for i in tranches
},**{i:X[i]*1000 for i in tranches
})
title+=" (par rapport à leur popu %)"
else:
pop[tranches]=pop[tranches]*1000.
listTranches=_fun_.listl(*[ choroplethEda_(locations="pays",
color=i+"ByPopu",
custom_data=[i],
tracesXargs=dict(hovertemplate="pays=%{location}<br>"+i+'ByPopu=%{z}%<br>popu=%{customdata[0]:s}')) if byPopu else choroplethEda_(locations="pays",
color=i)
for i in tranches ])
reduceAdd = _ftools_.reduce(go.Figure.__add__)
def setTrue_(i):
def setTrue__(df):
f=list(df)
f[i]=True
return f
return addToPipe(setTrue__)
fig=(pop |
listTranches |
reduceAdd
.update_traces(visible=False))
_=fig.data[0].update(visible=True)
buttons_=[dict(label=label,
method="update",
args=[{"visible": maskTranches | setTrue_(i)},
{"title": title+" "+label}]) for i,label in enumerate(labelsTranches) ]
return fig.update_layout(
title=title+" : Moins de 15 ans",
updatemenus=[
dict(
active=0,
buttons=buttons_,
)
])
graphPopuTranchesAge_=addToPipe(graphPopuTranchesAge)
def getCountryFromData(country):
global covidEdaTrainX
return (covidEdaTrainX >>
df.filter_by(X.Country_Region == country) |__
.groupby(["Country_Region","Date"]).agg(dict(Deaths=sum,Confirmed=sum)) |__
[slice(country, country)] |__
.reset_index() |__
.drop(["Country_Region"],axis=1))
def getCountryAndDtFromData(country):
vv=getCountryFromData(country)
return vv >> df.mutate(d_Deaths_dt=np.gradient(vv.Deaths)) >> df.mutate(d_Deaths_dt=X.d_Deaths_dt.interpolate())
def getCountryAndDtsFromData(country):
vv=getCountryFromData(country)
return vv >> df.mutate(d_Deaths_dt=np.gradient(vv.Deaths),
deces_jour=(X.Deaths - X.Deaths.shift())) >> df.mutate(d_Deaths_dt=X.d_Deaths_dt.interpolate().fillna(0),
deces_pct_change=(X.Deaths).pct_change().fillna(0),
deces_pct_change_shift=(X.deces_jour).pct_change().fillna(0))
def graphDeathBy(pays, nbDays=1, log=False):
nbI=nbDays
le="d"
title=pays
argsO = {} if not log else dict(yaxis_type="log")
if log:
title+=" (log)"
data1=(
(getCountryAndDtsFromData(pays) >> df.mutate(nb=1) |__
.groupByDate("Date",nbDays, label='right', closed='right').agg({
"Deaths":max,
"nb":sum,
"deces_jour":sum,
"deces_pct_change":np.mean})).reset_index() >>
df.mutate(Deaths_shift=(X.deces_jour*nbI/X.nb).replace({float("inf"):np.nan}).interpolate().fillna(0))>>
df.mutate(deces_pct_change_shift2=X.Deaths_shift.pct_change().replace({float("inf"):np.nan}).interpolate().fillna(0),
Date_Deb=X.Date.apply(lambda b:moment(b.timestamp()).subtract(day=nbI).date.strftime("%b %d, %Y")) ) >>
df.select("Date","Date_Deb",df.contains("Deaths"),df.contains("deces"),"nb")
)
return (
data1 |__fun__
.listl(
px.line_(__,
x="Date",
y="deces_pct_change_shift2",
color_discrete_sequence=["red"]).update_traces(name=f"Variation du Deces/{nbI}j",mode="markers+lines"),
px.scatter_(__,
x="Date",
y="Deaths",
color_discrete_sequence=["pink"]).update_traces(name=f"Décès Cumul"),
px.bar_(__,
x="Date",
y="deces_jour",
text="deces_jour",
custom_data=["Date_Deb","Deaths","nb"]
).update_traces(name=f"Deces/{nbI}j",
offset=-nbI*24*60*60*1000,
width=nbI*24*60*60*1000,
hovertemplate=hoverTemplate("Date Début ",
"Décès Cumulés ",
"Nombre de jours ",
x="x [Date] ",
y=f"y [Décès/{nbI}j]",dicoFirst=True))
) |_ftools_
.reduce(lambda a,b:a+b) |
addSecondAxis_ |
addSlider_ |
showLegend_ |__.
update_layout(title=title,
yaxis=dict(title=f"Nombre de décès/{nbI}j",
titlefont=dict(
color="blue"
),
tickfont=dict(
color="blue"
)),
yaxis2=dict(title=f"Variation du Nombre de décès/{nbI}j",
titlefont=dict(
color="red"
),
tickfont=dict(
color="red"
)),**argsO)
)
graphDeathBy_=addToPipe(graphDeathBy)
def graphDeaths(covidEdaTrainX,groupByDate=None,debut=moment(2010,2,25).date,log=False,both=False,noNan=False,
cummulative=False):
XXF=covidEdaTrainX.groupby(["Country_Region","Date"]).aggKV(Deaths=sum).reset_index()
if both:
log=False
fnLog=rien if not log else log_
fnNan=rien if noNan else zeroNan
title="Nombre de décès par pays"
if log:
title+=' (log)'
if groupByDate is not None:
XXF=(
((covidEdaTrainX | __
.groupby(["Country_Region","Date"]).aggKV(Deaths=sum).reset_index("Date")) >>
df.group_by("Country_Region") >> df.mutate(Deaths=(X.Deaths - X.Deaths.shift()).fillna(0).mini(0))) | addToPipe(
lambda a: a.groupby('Country_Region').groups.items() % _ftools_
.mapl(lambda i: a.loc[i[0]].reset_index().groupByDate("Date",label="right",nbI=groupByDate).aggKV(Deaths=sum).reset_index() | __
.set_axis( [i[0]]*__.shape[0],inplace=F) )) |_fun_.
pd.concat
)
XXF=XXF.reset_index().rename(columns={"index":"Country_Region"})
if cummulative:
XXF=XXF >>df.group_by(X.Country_Region) >> df.mutate(Deaths=df.cumsum(X.Deaths))
title+= " (cummul)"
title=title+f" (par {groupByDate} jours)"
figA=((XXF >>
df.mutate(Date_D=X.Date.dt.strftime("%d-%m-%y"),
Deaths=fnNan(X.Deaths)) >>
df.mutate(Deaths=fnLog(X.Deaths))>>
df.filter_by( X.Date > debut )
)|_fun_.
px.choropleth(__,
animation_frame="Date_D",
locations="Country_Region",
color_continuous_scale=px.colors.sequential.Bluered,
locationmode="country names",
color="Deaths") |__
.update_layout(title=title))
if both:
fnLog=log_
title+=" (log)"
fig2=((XXF >>
df.mutate(Date_D=X.Date.dt.strftime("%d-%m-%y"),
Deaths=fnNan(X.Deaths)) >>
df.mutate(Deaths=fnLog(X.Deaths))>>
df.filter_by( X.Date > debut )
)|_fun_.
px.choropleth(__,
animation_frame="Date_D",
locations="Country_Region",
color_continuous_scale=px.colors.sequential.Bluered,
locationmode="country names",
color="Deaths") |__
.update_layout(title=title))
figA=tabs(figA,fig2)
return figA
def graphDeathsByPopu(covidEdaTrainX, popuMondeTranches,groupByDate=None, log=False, both=False, animation=False, date="last",
debut=moment(2010,2,25).date,cummulative=False,**args):
gj=joinCovidDeathsAndPopu(covidEdaTrainX, popuMondeTranches)
title="Nombre de décès par pays par rapport à leurs populations"
opts={}
fnDateD= lambda X:X.dt.strftime("%d-%m-%y")
if groupByDate:
animation=True
if not animation:
if date=="last":
gj=gj.groupby(["Country_Region","tranches_age"]).last().reset_index()
title+=" (lastDay)"
else:
gj = (gj >> df.filter_by(X.Date >= date)) | __.groupby(["Country_Region","tranches_age"]).first().reset_index()
title+=f" (~{date.strftime('%d-%m-%y')})"
else:
opts["animation_frame"]="Date_D"
title+=" (byDate)"
#fnDateD= lambda X:X.dt.strftime("%d-%m-%y")
if both:
log=False
fnLog=log_ if log else rien
if log:
title+=" (log)"
if groupByDate is not None:
#return gj
gj=(
((gj | __
.groupby(["Country_Region","Date"]).aggKV(Deaths=sum,popu=np.max).reset_index("Date")) >>
df.group_by("Country_Region") >> df.mutate(Deaths=(X.Deaths - X.Deaths.shift()).fillna(0).mini(0))) | addToPipe(
lambda a: a.groupby('Country_Region').groups.items() % _ftools_
.mapl(lambda i: a.loc[i[0]].reset_index().groupByDate("Date",label="right",nbI=groupByDate).aggKV(Deaths=sum,popu=np.max).reset_index() | __
.set_axis( [i[0]]*__.shape[0],inplace=F) )) |_fun_.
pd.concat
)
gj=gj.reset_index().rename(columns={"index":"Country_Region"})
if cummulative:
gj=gj >> df.group_by(X.Country_Region) >> df.mutate(Deaths=df.cumsum(X.Deaths))
title+= " (cummul)"
title=title+f" (par {groupByDate} jours)"
gj=gj>>df.mutate(Deaths=make_symbolic(lambda a:a.transform(lambda b:np.max([b,0])))(X.Deaths))
#return gj
figA= ((
gj.reset_index() >>
df.filter_by( X.Date > debut ) >>
df.mutate(Deaths_by_popu=X.Deaths/(X.popu*1000)*100.0,Date_D=fnDateD(X.Date)) >>
df.mutate(Deaths_by_popu=fnLog(X.Deaths_by_popu)) >>
df.mutate(**{"Deaths_by_popu (%)":make_symbolic(lambda a:np.round(a,4))(X.Deaths_by_popu)})) |
px.choropleth_(__,
locations="Country_Region",
hover_data=["Date_D","Deaths","popu","Deaths_by_popu (%)"],
locationmode="country names",
color_continuous_scale=px.colors.sequential.Bluered,
color="Deaths_by_popu",**opts) |__
.update_layout(title=title))
if both:
fnLog=log_
title+=" (log)"
fig2= ((
gj.reset_index() >>
df.filter_by( X.Date > debut ) >>
df.mutate(Deaths_by_popu=X.Deaths/(X.popu*1000)*100.0,Date_D=fnDateD(X.Date)) >>
df.mutate(Deaths_by_popu=fnLog(X.Deaths_by_popu))) |
px.choropleth_(__,
locations="Country_Region",
hover_data=["Date_D","Deaths","popu"],
**opts,
locationmode="country names",
color_continuous_scale=px.colors.sequential.Bluered,
color="Deaths_by_popu") |__
.update_layout(title=title))
figA=tabs(figA,fig2)
return figA
# prep custom function
def as_int2(self,li):
li = li if isinstance(li,collections.abc.Iterable) and not isinstance(li,str) else [li]
self._data[li]=self._data[li].apply(lambda a:unNamesEscape(a.values),axis=0)
self._data[li]=self._data[li].astype("int")
return self
Regroupe les données pour un projet
Covidproj = StudyProject.getOrCreate("covid")
on liste les données disponible dans le projet
list(Covidproj.data.keys())
on check pour voir s'il y a des maj de données nécessaires
def preTCovidTemporalAllCountry(covidData):
covidData.Date = pd.to_datetime(covidData.Date)
covidData["Country_Region"]=covidData["Country_Region"].astype("category")
covidData["Province_State"]=covidData["Province_State"].astype("category")
return covidData
covidDatas=(
zipl(
["covidTemporalAllCountry","frHospit","tests"],
[lambda:getData(silent=True),lambda:getDataHospitFrance(silent=True),T],
["covidData","frHospit","testsWorld"],
[False,False,True],
[getData,getDataHospitFrance,getDataTests],
[True,True,False],
[preTCovidTemporalAllCountry,None,None]
)
)
demographyDatas=(
zipl(
["popuMondeTranches","urba","medianAge","popuDensity"],
[T,T,T,T],
["popuMondeTranches","tauxUrbaWold","medianAgeWorld","popuDensityWorld"],
[getDataPopu,getDataUrba,getDataMedianAge,getDataPopuDensity]
)
)
healtyDatas=(
zipl(
["lifeExpectancy","beds","doctors","tauxDeces","surpoisData"],
[T,T,T,T,T],
["lifeExp","beds","doctors","tauxDecesWorld","surpoisData"],
[getDataLife,
getDataBeds,
getDataDoctors,
lambda:pd.read_csv("data/DeathsRateWorld.csv"),
lambda:(pd.read_csv("data/surpois.csv",sep=";",usecols=range(2),na_values="..",index_col=0)
.dropna().rename_axis(index="Country").reset_index())]
)
)
politicalDatas=(
zipl(
["politicalRegime"],
[T],
["politicalRegimeWorld"],
[getDataPoliticalRegime]
)
)
franceDatas=(
zipl(
["depLatLong"],
[T],
["depLatLong"],
[getDepartementsLatLon]
)
)
#Covid
covidDatas | getDataX_(4)
# if "covidDataFr" in Covidproj.data and okDataFr:
# print("covidDataFr déjà dans le projet")
# else:
# print("/!\ covidDataFr pas exporté")
#demography
demographyDatas | getDataX_()
#Health
healtyDatas | getDataX_()
#Political
politicalDatas | getDataX_()
#France
franceDatas | getDataX_();
#covid
covidDatas | getDataXX_(4)
#fr Hospit
# if "covidDataFr" not in Covidproj.data or not okDataFr:
# print("covidDataFr not in project")
# covidData = getDataFr()
# covidData.date = pd.to_datetime(covidData.date)
# Covidproj.saveDatasWithId("covidDataFr",covidData,pd.Series(name="__fake__"),
# pd.DataFrame(),pd.Series(name="__fake2__"))
# Covidproj.export()
# print("ok")
#demography
demographyDatas | getDataXX_()
#Health
healtyDatas | getDataXX_()
#Political
politicalDatas | getDataXX_()
#France
franceDatas | getDataXX_();
On crée une study (étude) pour chaque jeu de données que l'on va utilisé (une étude -> un jeu de données)
covidEda=Covidproj.addOrGetStudy("covidEda")
if covidEda.datas is None or not covidDatas[0][1]():#or T:
print("set covidTemporalAllCountry to Study")
covidEda.setDataTrainTest(id_="covidTemporalAllCountry")
Covidproj.export()
covidEda
# covidEdaFr=Covidproj.addOrGetStudy("covidEdaFr")
# if covidEdaFr.datas is None or not okDataFr:#or T:
# print("set covidDataFr to Study")
# covidEdaFr.setDataTrainTest(id_="covidDataFr")
# Covidproj.export()
# covidEdaFr
frHospit=Covidproj.addOrGetStudy("frHospit")
if frHospit.datas is None or not covidDatas[1][1]():#or T:
print("set frHospit to Study")
frHospit.setDataTrainTest(id_="frHospit")
Covidproj.export()
frHospit
covidDataPlus=pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv",index_col="countriesAndTerritories")
display_html("""
<script>
alert('Initialisation Effectuée')
</script>
""")
clear_output()